# Package imports
import pandas as pd

def IQR(column): 
    """ Calculates the interquartile range (IQR) for a given DataFrame column using the quantile method """
    q25, q75 = column.quantile([0.25, 0.75])
    return q75-q25

# Load the data from London Bridge
lb = pd.read_csv('10-11_London_Bridge.txt')
lb.head(3)

# Take only the first three columns
df = lb.iloc[:, :3]

# Rename columns
df.columns = ['datetime', 'water_level', 'is_high_tide']

df.head(3)

# Convert to datetime
df['datetime'] = pd.to_datetime(df['datetime'], dayfirst=True)

# Convert to float
df['water_level'] = df.water_level.astype(float)

# Create extra month and year columns for easy access
df['month'] = df['datetime'].dt.month
df['year'] = df['datetime'].dt.year

# Filter df for high and low tide
tide_high = df[df['is_high_tide'] == 1]
tide_low = df[df['is_high_tide'] == 0]

# Create summary statistics
high_statistics = tide_high['water_level'].agg(['mean', 'median', IQR])
low_statistics = tide_low['water_level'].agg(['mean', 'median', IQR])

high_statistics

mean      3.318373
median    3.352600
IQR       0.743600
Name: water_level, dtype: float64

low_statistics

mean     -2.383737
median   -2.412900
IQR       0.538200
Name: water_level, dtype: float64

# Calculate ratio of high tide days
all_high_days = tide_high.groupby('year')['water_level'].count()
very_high_days = tide_high[tide_high['water_level'] > tide_high['water_level'].quantile(0.90)].groupby('year')['water_level'].count()
very_high_ratio = (very_high_days/all_high_days).reset_index()

very_high_ratio

# Calculate ratio of low tide days
all_low_days = tide_low.groupby('year')['water_level'].count()
very_low_days = tide_low[tide_low['water_level'] < tide_low['water_level'].quantile(0.10)].groupby('year')['water_level'].count()
very_low_ratio = (very_low_days/all_low_days).reset_index()

very_low_ratio

solution = {'high_statistics': high_statistics, 'low_statistics': low_statistics, 'very_high_ratio': very_high_ratio, 'very_low_ratio':very_low_ratio}
print(solution)

{'high_statistics': mean      3.318373
median    3.352600
IQR       0.743600
Name: water_level, dtype: float64, 'low_statistics': mean     -2.383737
median   -2.412900
IQR       0.538200
Name: water_level, dtype: float64, 'very_high_ratio':     year  water_level
0   1911     0.004098
1   1912     0.032316
2   1913     0.082212
3   1914     0.055313
4   1915     0.045045
..   ...          ...
80  1991     0.096317
81  1992     0.103253
82  1993     0.145923
83  1994     0.150355
84  1995     0.170213

[85 rows x 2 columns], 'very_low_ratio':     year  water_level
0   1911     0.060606
1   1912     0.066667
2   1913     0.022388
3   1914     0.039017
4   1915     0.033435
..   ...          ...
80  1991     0.150355
81  1992     0.107496
82  1993     0.112696
83  1994     0.106383
84  1995     0.107801

[85 rows x 2 columns]}

Variable Name	Description	Format
Date and time	The date and time when the measurement was taken, in GMT. The tide gauge measures with one-minute accuracy.	dd/mm/yyyy hh:mm:ss
Water level	The height of the tide, either high or low water level, measured by the tide meter. The measurements are accurate to one centimetre.	metres (using Admiralty Chart Datum, Ordnance Datum Newlyn, or Trinity High Water)
Flag	Indicates whether the measurement is high water (1) or low water (0).	Categorical (0 or 1)

	Date and time	water level (m ODN)	flag	HW=1 or LW=0
0	01/05/1911 15:40:00	3.7130	1	NaN
1	02/05/1911 11:25:00	-2.9415	0	NaN
2	02/05/1911 16:05:00	3.3828	1	NaN

	datetime	water_level	is_high_tide
0	01/05/1911 15:40:00	3.7130	1
1	02/05/1911 11:25:00	-2.9415	0
2	02/05/1911 16:05:00	3.3828	1

Project Description¶

Find the mean, median, and interquartile range for high- and low-tide data.¶

Calculate the annual percentage of days with very high tide levels (90th percentile of high tide days) and low-tide days (below the 10th percentile).¶

Create a dictionary named `solution` with a summary of data analysis.¶

	year	water_level
0	1911	0.004098
1	1912	0.032316
2	1913	0.082212
3	1914	0.055313
4	1915	0.045045
...	...	...
80	1991	0.096317
81	1992	0.103253
82	1993	0.145923
83	1994	0.150355
84	1995	0.170213

	year	water_level
0	1911	0.060606
1	1912	0.066667
2	1913	0.022388
3	1914	0.039017
4	1915	0.033435
...	...	...
80	1991	0.150355
81	1992	0.107496
82	1993	0.112696
83	1994	0.106383
84	1995	0.107801

Project Description¶

Find the mean, median, and interquartile range for high- and low-tide data.¶

Calculate the annual percentage of days with very high tide levels (90th percentile of high tide days) and low-tide days (below the 10th percentile).¶

Create a dictionary named solution with a summary of data analysis.¶

Create a dictionary named `solution` with a summary of data analysis.¶